library(tswge)
library(dplyr)
library(tidyverse)
library(readxl)
library(vars)
library(nnfor)
# Set this to data folder for your file
#setwd("/Users/riccoferraro/Documents/SMU/TimeSeries/TimeSeriesFinalProject/data")
setwd("/Users/mingyang/Desktop/SMU/TimeSeries/TimeSeriesFinalProject/data")
# Read in DFWA electricity Data
dfwa.electricity = read.csv("AVG_ELEC_DFWA_TX.csv",col.names = c("DATE", "AVG_EP"),
colClasses = c(DATE="character", AVG_EP="character"))
# Read in CPI data for Southern Urban area
cpi = read_excel("SouthernUrbanCPI.xlsx",sheet = "BLS Data Series", skip = 11)
# Getting rid of Half1 and Half2 which starts with S
cpi = cpi %>% filter(!grepl('S',Period) )
# Getting rid of Annual which labeled as M13
cpi = cpi %>% filter(!grepl('M13',Period) )
# Read in Gas Price data from same area
gas.price = read.csv("Dallas_FWA_GAS.csv")
# Read in Temperature Data & cleaning
temp = read_excel("DallasAreaTemp.xlsx", sheet = "Sheet1")
temp = temp %>% tidyr::pivot_longer(
cols = starts_with("Mon_"),
names_to = "Month",
values_to = "Temperature"
)
temp = temp[1:386,]
# subset dataset
# which(dfwa.electricity$DATE=="1990-01-01") # 135
dfwa.electricity = dfwa.electricity[135:520,]
rownames(dfwa.electricity) <- 1:nrow(dfwa.electricity)
# which(cpi$Year==1990 & cpi$Period=="M01")
cpi = cpi[91:476,]
rownames(cpi) <- 1:nrow(cpi)
# which(gas.price$DATE=="1990-01-01") # 145
gas.price = gas.price[145:530,]
rownames(gas.price) <- 1:nrow(gas.price)
#### Creating ultimate data frame under variable 'df' ####
df = dfwa.electricity
df$CPI = cpi$Value
df$GAS_P = gas.price$APUS37A7471A
df$AVG_EP = as.numeric(df$AVG_EP)
df$TEMP = temp$Temperature
#### Due to distribution market deregulation in 1995, team decided to cut the realization
#### prior to 2000
# which(df$DATE=="2000-01-01") # 121
df = df[121:386,]
rownames(df) <- 1:nrow(df)
plotts.wge(df$AVG_EP)
plotts.wge(df$CPI)
plotts.wge(df$GAS_P)
plotts.wge(df$TEMP)
# There seem to be a slowly damping behavior which might support difference the data
plotts.sample.wge(df$AVG_EP)
## $xbar
## [1] 0.1177444
##
## $autplt
## [1] 1.0000000 0.9602995 0.9151642 0.8723585 0.8295085 0.7865279 0.7574500
## [8] 0.7362720 0.7262652 0.7196580 0.7114933 0.7017810 0.6879721 0.6568276
## [15] 0.6238992 0.5937430 0.5625050 0.5322346 0.5184125 0.5104122 0.5059540
## [22] 0.5026686 0.4994694 0.4952331 0.4869328 0.4606747
##
## $freq
## [1] 0.003759398 0.007518797 0.011278195 0.015037594 0.018796992 0.022556391
## [7] 0.026315789 0.030075188 0.033834586 0.037593985 0.041353383 0.045112782
## [13] 0.048872180 0.052631579 0.056390977 0.060150376 0.063909774 0.067669173
## [19] 0.071428571 0.075187970 0.078947368 0.082706767 0.086466165 0.090225564
## [25] 0.093984962 0.097744361 0.101503759 0.105263158 0.109022556 0.112781955
## [31] 0.116541353 0.120300752 0.124060150 0.127819549 0.131578947 0.135338346
## [37] 0.139097744 0.142857143 0.146616541 0.150375940 0.154135338 0.157894737
## [43] 0.161654135 0.165413534 0.169172932 0.172932331 0.176691729 0.180451128
## [49] 0.184210526 0.187969925 0.191729323 0.195488722 0.199248120 0.203007519
## [55] 0.206766917 0.210526316 0.214285714 0.218045113 0.221804511 0.225563910
## [61] 0.229323308 0.233082707 0.236842105 0.240601504 0.244360902 0.248120301
## [67] 0.251879699 0.255639098 0.259398496 0.263157895 0.266917293 0.270676692
## [73] 0.274436090 0.278195489 0.281954887 0.285714286 0.289473684 0.293233083
## [79] 0.296992481 0.300751880 0.304511278 0.308270677 0.312030075 0.315789474
## [85] 0.319548872 0.323308271 0.327067669 0.330827068 0.334586466 0.338345865
## [91] 0.342105263 0.345864662 0.349624060 0.353383459 0.357142857 0.360902256
## [97] 0.364661654 0.368421053 0.372180451 0.375939850 0.379699248 0.383458647
## [103] 0.387218045 0.390977444 0.394736842 0.398496241 0.402255639 0.406015038
## [109] 0.409774436 0.413533835 0.417293233 0.421052632 0.424812030 0.428571429
## [115] 0.432330827 0.436090226 0.439849624 0.443609023 0.447368421 0.451127820
## [121] 0.454887218 0.458646617 0.462406015 0.466165414 0.469924812 0.473684211
## [127] 0.477443609 0.481203008 0.484962406 0.488721805 0.492481203 0.496240602
## [133] 0.500000000
##
## $dbz
## [1] 12.5686893 12.3173433 11.8982305 11.3114379 10.5579811 9.6409858
## [7] 8.5676671 7.3524912 6.0218430 4.6198784 3.2132709 1.8890411
## [13] 0.7381312 -0.1737122 -0.8283465 -1.2512906 -1.4891883 -1.5921087
## [19] -1.6091720 -1.5896992 -1.5821599 -1.6303232 -1.7694859 -2.0245855
## [25] -2.4100298 -2.9302313 -3.5799832 -4.3442938 -5.1978261 -6.1046537
## [31] -7.0196173 -7.8927223 -8.6769837 -9.3376650 -9.8587949 -10.2438964
## [37] -10.5116138 -10.6894307 -10.8079797 -10.8966372 -10.9802874 -11.0772077
## [43] -11.1981223 -11.3463448 -11.5187772 -11.7074923 -11.9016724 -12.0897493
## [49] -12.2616000 -12.4105708 -12.5349379 -12.6383063 -12.7285740 -12.8154841
## [55] -12.9072621 -13.0071528 -13.1107990 -13.2054874 -13.2722777 -13.2914789
## [61] -13.2502907 -13.1493354 -13.0044255 -12.8426050 -12.6950565 -12.5905515
## [67] -12.5514804 -12.5924245 -12.7202475 -12.9346975 -13.2288848 -13.5893678
## [73] -13.9958765 -14.4210309 -14.8308363 -15.1871536 -15.4531999 -15.6016961
## [79] -15.6227241 -15.5269821 -15.3423318 -15.1058464 -14.8557019 -14.6258190
## [85] -14.4436045 -14.3297151 -14.2986430 -14.3593301 -14.5154180 -14.7649981
## [91] -15.0998965 -15.5047232 -15.9562371 -16.4240086 -16.8735912 -17.2727527
## [97] -17.5992900 -17.8466594 -18.0238263 -18.1492694 -18.2427639 -18.3189949
## [103] -18.3848575 -18.4401307 -18.4802628 -18.4998799 -18.4957898 -18.4686207
## [109] -18.4228343 -18.3655051 -18.3046120 -18.2475191 -18.1999932 -18.1657677
## [115] -18.1464773 -18.1417432 -18.1492234 -18.1645248 -18.1809693 -18.1893543
## [121] -18.1780207 -18.1336861 -18.0434445 -17.8979003 -17.6946141 -17.4404011
## [127] -17.1512365 -16.8496724 -16.5609037 -16.3089797 -16.1141036 -15.9911686
## [133] -15.9491882
# Try overfitting
est = est.ar.wge(df$AVG_EP, p=16, type='burg')
##
##
## Coefficients of AR polynomial:
## 1.1098 -0.0493 -0.1180 0.0863 -0.1701 0.1127 -0.0761 0.0744 0.0928 -0.2121 0.1797 0.3475 -0.4070 -0.0942 0.1370 -0.0341
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.9605B 1.0411 0.9605 0.0000
## 1-0.0146B+0.9208B^2 0.0079+-1.0421i 0.9596 0.2488
## 1+0.9509B+0.9131B^2 -0.5207+-0.9078i 0.9555 0.3329
## 1-1.6443B+0.9007B^2 0.9128+-0.5264i 0.9490 0.0833
## 1-0.9206B+0.8507B^2 0.5411+-0.9395i 0.9224 0.1668
## 1-0.9208B 1.0861 0.9208 0.0000
## 1+1.4952B+0.7387B^2 -1.0120+-0.5740i 0.8595 0.4179
## 1+0.8128B -1.2303 0.8128 0.5000
## 1+0.7275B -1.3745 0.7275 0.5000
## 1-0.6354B+0.1371B^2 2.3176+-1.3870i 0.3702 0.0858
##
##
# compare this to seasonality of 12
factor.wge(phi = c(rep(0,11),1))
##
##
## Coefficients of AR polynomial:
## 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.0000B+1.0000B^2 0.5000+-0.8660i 1.0000 0.1667
## 1-1.0000B 1.0000 1.0000 0.0000
## 1-1.7321B+1.0000B^2 0.8660+-0.5000i 1.0000 0.0833
## 1+1.0000B+1.0000B^2 -0.5000+-0.8660i 1.0000 0.3333
## 1-0.0000B+1.0000B^2 0.0000+-1.0000i 1.0000 0.2500
## 1+1.7321B+1.0000B^2 -0.8660+-0.5000i 1.0000 0.4167
## 1+1.0000B -1.0000 1.0000 0.5000
##
##
# By using overfitting method, 1-B term seems to have the largest absolute reciprocal root which by itself supports differencing the data
# Additionally, there seem to have a 1-B^12 seasonality, even some of the factors such as 1+B at system frequency of -.5 and 1+1.73B+1B^2 at System frequency of 0.4167 are not as close to the unit circle. Although it might worth exploring s = 12 also
d1 = artrans.wge(df$AVG_EP, phi.tr = 1)
# with the differenced data there seem to have some sort of seasonal behavior at 12 left
d1.12 = artrans.wge(d1, phi.tr = c(rep(0,11),0.4))
dev.off()
## null device
## 1
acf(d1.12) # AIC looks about white noise
ljung.wge(d1.12, K=24) # K=24 reject white noise hypothesis
## Obs 0.1177406 0.09510556 -0.02821038 0.03577436 -0.05663082 -0.05190919 0.007935022 -0.02846601 0.06039309 -0.1174017 0.04320217 -0.1317076 0.06581953 -0.06049459 0.02239541 -0.06491078 -0.04911588 -0.1121 -0.1665538 -0.03136583 -0.05822815 0.03840348 0.03680909 0.1222457
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 39.35395
##
## $df
## [1] 24
##
## $pval
## [1] 0.02506178
ljung.wge(d1.12, K=48) # K = 48 reject white noise
## Obs 0.1177406 0.09510556 -0.02821038 0.03577436 -0.05663082 -0.05190919 0.007935022 -0.02846601 0.06039309 -0.1174017 0.04320217 -0.1317076 0.06581953 -0.06049459 0.02239541 -0.06491078 -0.04911588 -0.1121 -0.1665538 -0.03136583 -0.05822815 0.03840348 0.03680909 0.1222457 -0.009325486 -0.03049484 -0.01441944 0.03846121 -0.1092629 0.0477006 0.01390417 0.01907413 0.00746239 -0.02442159 0.01661073 0.1696413 0.03805096 0.0511036 0.03743145 -0.07413577 -0.1043787 -0.1778826 -0.08752393 -0.1099639 9.902894e-05 -0.04875424 0.09696807 0.1329072
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 84.83926
##
## $df
## [1] 48
##
## $pval
## [1] 0.0008252289
# Thus models require further modeling
# est1.12 = aic5.wge(d1.12, p=0:15, q=0:5) # AIC picked p = 7, q=4
# est.1.12.bic = aic5.wge(d1.12, p=0:15, q=0:5, type = 'bic') # bic picked p=0, q=0
# est.1.12.aicc = aic5.wge(d1.12, p=0:15, q=0:5, type = 'aicc') # aicc picked p=1, q=0
# seems AIC leaning towards a fancy model, while bic leaning towards white noise.
# I will attempt something in the middle by using AR(1) instead
params.est = est.arma.wge(d1.12, p=1)
##
##
## Coefficients of AR polynomial:
## 0.1182
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.1182B 8.4627 0.1182 0.0000
##
##
acf(params.est$res) # residuals looks about white noise
ljung.wge(params.est$res, K=24) # K=24 reject white noise hypothesis
## Obs -0.01014382 0.08726767 -0.04465155 0.04663999 -0.05631173 -0.04714291 0.01811757 -0.03731123 0.0796301 -0.1331266 0.07436933 -0.1485251 0.09073813 -0.07272263 0.03809915 -0.06359699 -0.02927703 -0.08955453 -0.154185 -0.005329216 -0.06060576 0.04209565 0.01856966 0.1223151
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 38.64312
##
## $df
## [1] 24
##
## $pval
## [1] 0.02975776
ljung.wge(params.est$res, K=48) # K = 48reject null hypothesis of white noise
## Obs -0.01014382 0.08726767 -0.04465155 0.04663999 -0.05631173 -0.04714291 0.01811757 -0.03731123 0.0796301 -0.1331266 0.07436933 -0.1485251 0.09073813 -0.07272263 0.03809915 -0.06359699 -0.02927703 -0.08955453 -0.154185 -0.005329216 -0.06060576 0.04209565 0.01856966 0.1223151 -0.02076998 -0.02871357 -0.01578537 0.05426695 -0.1226772 0.06055979 0.006250035 0.01706408 0.008359714 -0.02802056 -0.0004448668 0.1679758 0.01269146 0.04367132 0.04119715 -0.06838042 -0.0771094 -0.1602163 -0.05547658 -0.102604 0.01950958 -0.06197392 0.08988801 0.1271899
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 79.38565
##
## $df
## [1] 48
##
## $pval
## [1] 0.002934686
# Try fancier model identified by AIC
params.est = est.arma.wge(d1.12, p=7,q=4)
##
##
## Coefficients of AR polynomial:
## -0.8600 -0.1354 -0.9232 -0.6079 0.1714 -0.0515 -0.0085
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.8898B+0.9402B^2 -1.0050+-0.2314i 0.9696 0.4640
## 1-0.7621B+0.8873B^2 0.4294+-0.9709i 0.9420 0.1837
## 1-0.3782B+0.0918B^2 2.0591+-2.5786i 0.3030 0.1428
## 1+0.1104B -9.0565 0.1104 0.5000
##
##
##
##
## Coefficients of MA polynomial:
## -1.0151 -0.3327 -1.0074 -0.7941
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.7997B+0.9310B^2 0.4295+-0.9432i 0.9649 0.1820
## 1+1.8148B+0.8530B^2 -1.0638+-0.2018i 0.9236 0.4702
##
##
acf(params.est$res) # residuals looks about white noise
ljung.wge(params.est$res, K=24) # K=24 fail to reject white noise hypothesis
## Obs 0.0008823033 0.007168264 0.01503579 -0.002268612 -0.04788816 0.004909812 -0.009460663 0.01731718 -0.01596821 -0.05734878 0.0002766386 -0.04294852 0.006864266 -0.00473022 -0.04453687 -0.005459694 -0.0516052 -0.04690075 -0.1718459 -0.00778543 -0.05177915 0.0191305 0.05259443 0.08994917
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 16.1709
##
## $df
## [1] 24
##
## $pval
## [1] 0.8818087
ljung.wge(params.est$res, K=48) # K = 48 fail to reject null hypothesis of white noise
## Obs 0.0008823033 0.007168264 0.01503579 -0.002268612 -0.04788816 0.004909812 -0.009460663 0.01731718 -0.01596821 -0.05734878 0.0002766386 -0.04294852 0.006864266 -0.00473022 -0.04453687 -0.005459694 -0.0516052 -0.04690075 -0.1718459 -0.00778543 -0.05177915 0.0191305 0.05259443 0.08994917 0.01760575 -0.08784062 0.02203463 0.0346197 -0.103104 0.06085884 0.01483718 -0.008431453 0.01655047 -0.02007022 -0.02552383 0.2078729 -0.04307097 0.07704926 0.004884021 -0.02369241 -0.1080703 -0.107706 -0.09422072 -0.07229019 0.003069584 -0.0453473 0.07907943 0.134459
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 58.63666
##
## $df
## [1] 48
##
## $pval
## [1] 0.1397722
# All models are wrong some are useful - we will proceed with fancier model for now
model1.param = mult.wge(fac1 = params.est$phi, fac2 = c(rep(0,11),0.4))
pred.short = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 3, limits = T, lastn = T)
ASE.short = mean((df$AVG_EP[264:266]-pred.short$f)^2)
ASE.short # 3.184093e-05 -> 0.0000318 #New 5.988891e-05 -> 0.0000599
## [1] 5.988891e-05
pred.long = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 36, limits = T, lastn = T)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-pred.long$f)^2)
ASE.long # 0.0001725023
## [1] 0.0001725023
# rolling.res.short = roll.win.rmse.wge(df$AVG_EP,phi = model1.param$model.coef, theta = params.est$theta, d = 1, horizon = 3 )
# rolling.res.short # RMSE = 0.004, ASE = 1.6e-0.5 -> 0.000016, nums of windows = 239
# rolling.res.long = roll.win.rmse.wge(df$AVG_EP,phi = model1.param$model.coef, theta = params.est$theta, d = 1, horizon = 36)
# rolling.res.long # RMSE = 0.014, ASE= 0.000196, num of windows = 206
x = df$AVG_EP
n = length(x)
t = 1:n
d = lm(x~t)
# x.z are the residuals from the regression line
x.z = x-d$coefficients[1] - d$coefficients[2]*t
plotts.sample.wge(x.z)
## $xbar
## [1] -1.589301e-16
##
## $autplt
## [1] 1.0000000 0.9489817 0.8860380 0.8248767 0.7634339 0.7020367 0.6633172
## [8] 0.6378392 0.6311052 0.6288295 0.6223238 0.6178089 0.6051494 0.5567153
## [15] 0.5020588 0.4530965 0.4019963 0.3547485 0.3324767 0.3224324 0.3258818
## [22] 0.3325654 0.3401354 0.3471977 0.3461759 0.3106075
##
## $freq
## [1] 0.003759398 0.007518797 0.011278195 0.015037594 0.018796992 0.022556391
## [7] 0.026315789 0.030075188 0.033834586 0.037593985 0.041353383 0.045112782
## [13] 0.048872180 0.052631579 0.056390977 0.060150376 0.063909774 0.067669173
## [19] 0.071428571 0.075187970 0.078947368 0.082706767 0.086466165 0.090225564
## [25] 0.093984962 0.097744361 0.101503759 0.105263158 0.109022556 0.112781955
## [31] 0.116541353 0.120300752 0.124060150 0.127819549 0.131578947 0.135338346
## [37] 0.139097744 0.142857143 0.146616541 0.150375940 0.154135338 0.157894737
## [43] 0.161654135 0.165413534 0.169172932 0.172932331 0.176691729 0.180451128
## [49] 0.184210526 0.187969925 0.191729323 0.195488722 0.199248120 0.203007519
## [55] 0.206766917 0.210526316 0.214285714 0.218045113 0.221804511 0.225563910
## [61] 0.229323308 0.233082707 0.236842105 0.240601504 0.244360902 0.248120301
## [67] 0.251879699 0.255639098 0.259398496 0.263157895 0.266917293 0.270676692
## [73] 0.274436090 0.278195489 0.281954887 0.285714286 0.289473684 0.293233083
## [79] 0.296992481 0.300751880 0.304511278 0.308270677 0.312030075 0.315789474
## [85] 0.319548872 0.323308271 0.327067669 0.330827068 0.334586466 0.338345865
## [91] 0.342105263 0.345864662 0.349624060 0.353383459 0.357142857 0.360902256
## [97] 0.364661654 0.368421053 0.372180451 0.375939850 0.379699248 0.383458647
## [103] 0.387218045 0.390977444 0.394736842 0.398496241 0.402255639 0.406015038
## [109] 0.409774436 0.413533835 0.417293233 0.421052632 0.424812030 0.428571429
## [115] 0.432330827 0.436090226 0.439849624 0.443609023 0.447368421 0.451127820
## [121] 0.454887218 0.458646617 0.462406015 0.466165414 0.469924812 0.473684211
## [127] 0.477443609 0.481203008 0.484962406 0.488721805 0.492481203 0.496240602
## [133] 0.500000000
##
## $dbz
## [1] 12.0998003 11.8768937 11.5060831 10.9887309 10.3274249 9.5267949
## [7] 8.5947728 7.5444633 6.3967899 5.1839353 3.9530584 2.7684781
## [13] 1.7084627 0.8518069 0.2541335 -0.0745938 -0.1743568 -0.1177711
## [19] 0.0144354 0.1506087 0.2347710 0.2264534 0.0979525 -0.1686171
## [25] -0.5835238 -1.1505684 -1.8670628 -2.7225211 -3.6959803 -4.7522406
## [31] -5.8384878 -6.8849898 -7.8152450 -8.5671197 -9.1146917 -9.4734693
## [37] -9.6849974 -9.7957781 -9.8450866 -9.8632716 -9.8747239 -9.9004990
## [43] -9.9588765 -10.0643331 -10.2259892 -10.4462858 -10.7202438 -11.0355161
## [49] -11.3735133 -11.7119071 -12.0284324 -12.3050393 -12.5306170 -12.7007014
## [55] -12.8141695 -12.8688826 -12.8591215 -12.7768723 -12.6169175 -12.3831459
## [61] -12.0922015 -11.7721162 -11.4569520 -11.1806198 -10.9724449 -10.8551599
## [67] -10.8446711 -10.9506198 -11.1769803 -11.5222336 -11.9788437 -12.5318600
## [73] -13.1565860 -13.8156409 -14.4567368 -15.0142647 -15.4190440 -15.6175395
## [79] -15.5923708 -15.3692278 -15.0045085 -14.5644123 -14.1091339 -13.6864056
## [85] -13.3313740 -13.0688944 -12.9160214 -12.8839002 -12.9789154 -13.2031287
## [91] -13.5540331 -14.0236183 -14.5967762 -15.2492900 -15.9461957 -16.6421910
## [97] -17.2864000 -17.8325758 -18.2515646 -18.5385622 -18.7097299 -18.7909681
## [103] -18.8068600 -18.7752305 -18.7074038 -18.6113240 -18.4946431 -18.3661466
## [109] -18.2353212 -18.1108378 -17.9989737 -17.9027150 -17.8218228 -17.7537556
## [115] -17.6950803 -17.6428399 -17.5953139 -17.5517708 -17.5111609 -17.4701294
## [121] -17.4211213 -17.3516003 -17.2454122 -17.0867976 -16.8662352 -16.5856744
## [127] -16.2602995 -15.9157677 -15.5825591 -15.2902399 -15.0635146 -14.9203412
## [133] -14.8714355
ar.z = aic.wge(x.z, p=0:15)
x.trans = artrans.wge(x, phi.tr = ar.z$phi)
t.trans = artrans.wge(t, phi.tr = ar.z$phi)
fit = lm(x.trans~ t.trans)
summary(fit)
##
## Call:
## lm(formula = x.trans ~ t.trans)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.014364 -0.002770 -0.000122 0.002314 0.016179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.724e-03 6.588e-04 13.243 < 2e-16 ***
## t.trans 1.957e-04 4.296e-05 4.554 8.22e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004589 on 250 degrees of freedom
## Multiple R-squared: 0.07661, Adjusted R-squared: 0.07291
## F-statistic: 20.74 on 1 and 250 DF, p-value: 8.22e-06
# As can be seen, after account for the serial correlation (AR(14)), there is strong evidence to suggest that the slope is significantly different from zero (pvalue < 0.0001). So we will take the trend out and proceed
# try bootstrapping method just to be sure
wbg.boot.wge(x, sn=103)
## $p
## [1] 2
##
## $phi
## [1] 1.1384970 -0.1616007
##
## $pv
## [1] 0.1077694
# Bootstrap-based test for trend failed to reject with p-value = 0.1078, however we will proceed to try out how using trend helps
ar.est = est.ar.wge(x = x.z, p=14, type = 'burg')
##
##
## Coefficients of AR polynomial:
## 1.0829 -0.0764 -0.0694 0.0897 -0.1963 0.1276 -0.0738 0.0629 0.1052 -0.2335 0.1907 0.3402 -0.3996 -0.0021
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.6619B+0.9161B^2 0.9071+-0.5185i 0.9571 0.0826
## 1-1.9020B+0.9086B^2 1.0467+-0.0712i 0.9532 0.0108
## 1-0.0219B+0.9024B^2 0.0121+-1.0526i 0.9500 0.2482
## 1+0.9609B+0.8985B^2 -0.5348+-0.9094i 0.9479 0.3346
## 1-0.9484B+0.8455B^2 0.5608+-0.9318i 0.9195 0.1638
## 1+0.9010B -1.1099 0.9010 0.5000
## 1+1.5840B+0.7807B^2 -1.0145+-0.5017i 0.8836 0.4269
## 1+0.0053B -189.3618 0.0053 0.5000
##
##
dev.off()
## null device
## 1
acf(ar.est$res)
ljung.wge(ar.est$res)
## Obs -0.003038258 0.03005427 -0.02099706 -0.0002871221 0.05986671 0.01695036 0.0530138 0.02703745 -0.004438696 0.004995789 -0.0291746 -0.1051625 0.01936145 -0.04933376 0.02083684 -0.04027929 0.03924574 -0.02867814 -0.1059654 0.0128511 -0.06517079 0.04325944 0.02233475 0.07069181
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 14.5005
##
## $df
## [1] 24
##
## $pval
## [1] 0.934527
ljung.wge(ar.est$res, K=48)
## Obs -0.003038258 0.03005427 -0.02099706 -0.0002871221 0.05986671 0.01695036 0.0530138 0.02703745 -0.004438696 0.004995789 -0.0291746 -0.1051625 0.01936145 -0.04933376 0.02083684 -0.04027929 0.03924574 -0.02867814 -0.1059654 0.0128511 -0.06517079 0.04325944 0.02233475 0.07069181 -0.02561514 -0.02072488 -0.02436513 0.07604687 -0.1057342 0.1075884 0.06789439 0.01064667 0.03701851 -0.04101051 -0.02696131 0.1481545 -0.02702826 0.07297504 -0.015737 -0.03237783 -0.06675938 -0.1181091 -0.0329735 -0.1023404 -0.008890106 -0.05093476 0.05829844 0.1212425
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 51.63008
##
## $df
## [1] 48
##
## $pval
## [1] 0.3338754
# both tests reject white noise
m2.result = fore.sigplusnoise.wge(x, linear = TRUE, max.p = 15, n.ahead = 3, lastn = TRUE, limits=TRUE)
##
##
## Coefficients of AR polynomial:
## 1.0866 -0.0799 -0.0683 0.0920 -0.1896 0.1230 -0.0848 0.0812 0.0994 -0.2389 0.2041 0.3393 -0.4123
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.6637B+0.9197B^2 0.9044+-0.5189i 0.9590 0.0829
## 1-1.9090B+0.9150B^2 1.0432+-0.0685i 0.9566 0.0104
## 1-0.0231B+0.9079B^2 0.0127+-1.0494i 0.9528 0.2481
## 1+0.9692B+0.9021B^2 -0.5372+-0.9055i 0.9498 0.3352
## 1-0.9461B+0.8493B^2 0.5570+-0.9312i 0.9216 0.1642
## 1+0.9029B -1.1075 0.9029 0.5000
## 1+1.5832B+0.7800B^2 -1.0148+-0.5021i 0.8832 0.4269
##
##
ASE.short.m2 = mean((df$AVG_EP[264:266]-m2.result$f)^2)
ASE.short.m2 # 6.718437e-05 -> 0.0000672
## [1] 6.718437e-05
m2.result.long = fore.sigplusnoise.wge(x, linear = TRUE, max.p = 15, n.ahead = 36, lastn = TRUE, limits=TRUE)
##
##
## Coefficients of AR polynomial:
## 1.0866 -0.0799 -0.0683 0.0920 -0.1896 0.1230 -0.0848 0.0812 0.0994 -0.2389 0.2041 0.3393 -0.4123
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.6637B+0.9197B^2 0.9044+-0.5189i 0.9590 0.0829
## 1-1.9090B+0.9150B^2 1.0432+-0.0685i 0.9566 0.0104
## 1-0.0231B+0.9079B^2 0.0127+-1.0494i 0.9528 0.2481
## 1+0.9692B+0.9021B^2 -0.5372+-0.9055i 0.9498 0.3352
## 1-0.9461B+0.8493B^2 0.5570+-0.9312i 0.9216 0.1642
## 1+0.9029B -1.1075 0.9029 0.5000
## 1+1.5832B+0.7800B^2 -1.0148+-0.5021i 0.8832 0.4269
##
##
ASE.long.m2 = mean((df$AVG_EP[(266-36+1):266]-m2.result.long$f)^2)
ASE.long.m2 # 0.0001268439
## [1] 0.0001268439
train.var = df[1:230,2:5]
test.var = df[231:266,2:5]
# Prior to fitting looking at correlation plot to get an idea
ccf(train.var$CPI, train.var$AVG_EP)
# Looks like Gas Price lag2 is most correlated with electricity price
ccf(train.var$GAS_P, train.var$AVG_EP)
# lag 0 or 1 of temperature seems most correlated with electricity price ( although this correlation is weaker compared to Gas and CPI)
ccf(train.var$TEMP, train.var$AVG_EP)
# AIC and FPE selected lag = 11, Schwarz Criterion selected lag = 6, Hannan Quinn Criterion selected lag = 3
# Try them all and compare their short term and long term ASE
VARselect(train.var, lag.max=15, type="both", season = NULL, exogen = NULL)
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 11 6 3 11
##
## $criteria
## 1 2 3 4 5
## AIC(n) -1.283887e+01 -1.379340e+01 -1.430460e+01 -1.446431e+01 -1.457456e+01
## HQ(n) -1.268684e+01 -1.354002e+01 -1.394988e+01 -1.400824e+01 -1.401713e+01
## SC(n) -1.246261e+01 -1.316630e+01 -1.342667e+01 -1.333554e+01 -1.319495e+01
## FPE(n) 2.655678e-06 1.022636e-06 6.136368e-07 5.234925e-07 4.694594e-07
## 6 7 8 9 10
## AIC(n) -1.468531e+01 -1.461657e+01 -1.462841e+01 -1.469184e+01 -1.476153e+01
## HQ(n) -1.402654e+01 -1.385645e+01 -1.376693e+01 -1.372901e+01 -1.369735e+01
## SC(n) -1.305486e+01 -1.273529e+01 -1.249629e+01 -1.230887e+01 -1.212773e+01
## FPE(n) 4.210328e-07 4.521535e-07 4.483443e-07 4.226040e-07 3.962645e-07
## 11 12 13 14 15
## AIC(n) -1.484743e+01 -1.479751e+01 -1.484364e+01 -1.478650e+01 -1.472243e+01
## HQ(n) -1.368191e+01 -1.353063e+01 -1.347541e+01 -1.331692e+01 -1.315150e+01
## SC(n) -1.196279e+01 -1.166203e+01 -1.145733e+01 -1.114935e+01 -1.083443e+01
## FPE(n) 3.660212e-07 3.877759e-07 3.737362e-07 4.000256e-07 4.318945e-07
# Try lag = 11
lsfit.p11 = VAR(train.var, p = 11, type ="both")
preds.p11.short = predict(lsfit.p11, n.ahead = 3)
preds.p11.long = predict(lsfit.p11, n.ahead = 36)
test_ASE_p11_short = mean((preds.p11.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p11_short # 1.40152e-05 -> 0.000014
## [1] 1.40152e-05
test_ASE_p11_long = mean((preds.p11.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p11_long # 0.0001447111
## [1] 0.0001447111
# Try lag = 6
lsfit.p6 = VAR(train.var, p = 6, type ="both")
preds.p6.short = predict(lsfit.p6, n.ahead = 3)
preds.p6.long = predict(lsfit.p6, n.ahead = 36)
test_ASE_p6_short = mean((preds.p6.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p6_short # 9.127952e-06 -> 0.00000913
## [1] 9.127952e-06
test_ASE_p6_long = mean((preds.p6.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p6_long # 0.0001166065
## [1] 0.0001166065
# Try lag = 3
lsfit.p3 = VAR(train.var, p = 3, type ="both")
preds.p3.short = predict(lsfit.p3, n.ahead = 3)
preds.p3.long = predict(lsfit.p3, n.ahead = 36)
test_ASE_p3_short = mean((preds.p3.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p3_short # 2.780987e-06 -> 0.000002781
## [1] 2.780987e-06
test_ASE_p3_long = mean((preds.p3.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p3_long # 9.241909e-05 -> 0.00009242
## [1] 9.241909e-05
# Short Term Forecast - VAR best model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast VAR")
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,1]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,3]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,2]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
# Long Term Forecast - VAR best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast VAR")
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,1]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,3]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,2]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
df.train = ts(df[1:230,2], start = c(2000,1), frequency = 12)
df.test = ts(df[231:266,2], start=c(2019,3), frequency = 12)
other.predictors.train = df[1:230,3:5]
set.seed(2)
fit.mlp1 = mlp(df.train, reps = 20, comb = "median", xreg = other.predictors.train,
xreg.lags = c(1,1,1), allow.det.season = FALSE, hd = 5, difforder = 0, sel.lag = FALSE)
fit.mlp1
## MLP fit with 5 hidden nodes and 20 repetitions.
## Univariate lags: (1,2,3,4,5,6,7,8,9,10,11,12)
## 3 regressors included.
## - Regressor 1 lags: (1)
## - Regressor 2 lags: (1)
## - Regressor 3 lags: (1)
## Forecast combined using the median operator.
## MSE: 0.
plot(fit.mlp1)
# Forecast next 36 CPI
set.seed(2)
fit.mlp.cpi = mlp(ts(df$CPI[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.cpi = forecast(fit.mlp.cpi, h=36)
plot(fore.mlp.cpi)
# Forecast next 36 Gas_P
set.seed(2)
fit.mlp.gas = mlp(ts(df$GAS_P[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.gas = forecast(fit.mlp.gas, h=36)
plot(fore.mlp.gas)
# Forecast next 36 Temperature
set.seed(2)
fit.mlp.temp = mlp(ts(df$TEMP[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
allow.det.season = TRUE)
fore.mlp.temp = forecast(fit.mlp.temp, h=36)
plot(fore.mlp.temp)
other.predictors.with.forecast = data.frame(CPI = c(df$CPI[1:230],fore.mlp.cpi$mean[1:36]),
GAS_P = c(df$GAS_P[1:230],fore.mlp.gas$mean[1:36]),
GAS_P = c(df$TEMP[1:230],fore.mlp.temp$mean[1:36]))
fore.mlp1.short = forecast(fit.mlp1, h=3, xreg = other.predictors.with.forecast)
test_ASE_mlp1_short = mean((fore.mlp1.short$mean[1:3]-df$AVG_EP[231:233])^2)
test_ASE_mlp1_short # 1.947679e-05 -> 0.00001947
## [1] 1.947679e-05
fore.mlp1.long = forecast(fit.mlp1, h=36, xreg = other.predictors.with.forecast)
test_ASE_mlp1_long = mean((fore.mlp1.long$mean[1:36]-df$AVG_EP[231:266])^2)
test_ASE_mlp1_long # 0.000111
## [1] 0.0001109964
# Short Term Forecast - MLP model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast MLP")
points(230:233,c(df$AVG_EP[230:230],fore.mlp1.short$mean[1:3]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
# Long Term Forecast - VAR best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast MLP")
points(230:266,c(df$AVG_EP[230:230],fore.mlp1.long$mean[1:36]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
ensemble.forecast.short = (pred.short$f+preds.p3.short$fcst$AVG_EP[,1]+fore.mlp1.short$mean[1:3])/3
test_ASE_ensemble1.short = mean((ensemble.forecast.short-df$AVG_EP[231:233])^2)
test_ASE_ensemble1.short # 3.312542e-06 -> 0.0000033
## [1] 3.312542e-06
ensemble.forecast.long = (pred.long$f+preds.p3.long$fcst$AVG_EP[,1]+fore.mlp1.long$mean[1:36])/3
test_ASE_ensemble1.long = mean((ensemble.forecast.long-df$AVG_EP[231:266])^2)
test_ASE_ensemble1.long # 7.819804e-05 -> 0.0000782
## [1] 7.819804e-05
# Short Term Forecast - Ensemble model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast Ensemble")
points(230:233,c(df$AVG_EP[230:230],ensemble.forecast.short),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
# Long Term Forecast - Ensemble best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast Ensemble")
points(230:266,c(df$AVG_EP[230:230],ensemble.forecast.long),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
library(ggplot2)
df[['DATE']] <- as.Date(df[['DATE']], format='%Y-%m-%d')
# Convert short term prediction to dataframe
pred.short = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 3, limits = T, lastn = F)
dev.off()
t = 250:266
plot(t, df$AVG_EP[250:266], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(250, 270),ylim=c(0.12,0.175),col=1)
axis(side=1,cex.axis=1.1,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.1,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.2,1.2,1.2),text=c("Time","",""),line=c(1.2,2.1,1.8))
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$f),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$ul),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$ll),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
pred.long = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
d = 1, n.ahead = 36, limits = T, lastn = F)
pred.short.df = data.frame(Date = df$DATE[264:266], prediction = pred.short$f, upper = pred.short$ul, lower = pred.short$ll)
colors <- c("Actual Price" = "#00AFBB", "Predicted Price" = "#FC4E07", "Prediction Limit"= "#E7B800")
# Original TS plot:
ggplot(data = df[1:266,], aes(x=DATE, y=AVG_EP))+
geom_line(color="#00AFBB", size = 1) +
geom_point(color="#00AFBB") +
ggtitle("Electricity Price Over time")+xlab("Date")+ylab("Average Price") + theme_classic()
ggplot(data = df[260:266,], aes(x=DATE, y=AVG_EP, color="Actual Price"))+
geom_line( size = 1) +
ggtitle("Short term average Electricity price forecast")+xlab("Date")+ylab("Average Price") +
geom_line(data = pred.short.df,aes(x=Date, y=prediction), color="#FC4E07", size=1.1) +
geom_point(data = pred.short.df,aes(x=Date, y=prediction), color="#FC4E07", size=1.5)+
geom_line(data = pred.short.df,aes(x=Date, y=upper), color="#E7B800", size=1.1)+
geom_point(data = pred.short.df,aes(x=Date, y=upper), color="#E7B800", size=1.1)+
geom_line(data = pred.short.df,aes(x=Date, y=lower), color="#E7B800", size=1.1)+
geom_point(data = pred.short.df,aes(x=Date, y=lower), color="#E7B800", size=1.1)+
scale_color_manual(values = colors) + theme_classic()
plotts.sample.wge(df$AVG_EP)
## $xbar
## [1] 0.1177444
##
## $autplt
## [1] 1.0000000 0.9602995 0.9151642 0.8723585 0.8295085 0.7865279 0.7574500
## [8] 0.7362720 0.7262652 0.7196580 0.7114933 0.7017810 0.6879721 0.6568276
## [15] 0.6238992 0.5937430 0.5625050 0.5322346 0.5184125 0.5104122 0.5059540
## [22] 0.5026686 0.4994694 0.4952331 0.4869328 0.4606747
##
## $freq
## [1] 0.003759398 0.007518797 0.011278195 0.015037594 0.018796992 0.022556391
## [7] 0.026315789 0.030075188 0.033834586 0.037593985 0.041353383 0.045112782
## [13] 0.048872180 0.052631579 0.056390977 0.060150376 0.063909774 0.067669173
## [19] 0.071428571 0.075187970 0.078947368 0.082706767 0.086466165 0.090225564
## [25] 0.093984962 0.097744361 0.101503759 0.105263158 0.109022556 0.112781955
## [31] 0.116541353 0.120300752 0.124060150 0.127819549 0.131578947 0.135338346
## [37] 0.139097744 0.142857143 0.146616541 0.150375940 0.154135338 0.157894737
## [43] 0.161654135 0.165413534 0.169172932 0.172932331 0.176691729 0.180451128
## [49] 0.184210526 0.187969925 0.191729323 0.195488722 0.199248120 0.203007519
## [55] 0.206766917 0.210526316 0.214285714 0.218045113 0.221804511 0.225563910
## [61] 0.229323308 0.233082707 0.236842105 0.240601504 0.244360902 0.248120301
## [67] 0.251879699 0.255639098 0.259398496 0.263157895 0.266917293 0.270676692
## [73] 0.274436090 0.278195489 0.281954887 0.285714286 0.289473684 0.293233083
## [79] 0.296992481 0.300751880 0.304511278 0.308270677 0.312030075 0.315789474
## [85] 0.319548872 0.323308271 0.327067669 0.330827068 0.334586466 0.338345865
## [91] 0.342105263 0.345864662 0.349624060 0.353383459 0.357142857 0.360902256
## [97] 0.364661654 0.368421053 0.372180451 0.375939850 0.379699248 0.383458647
## [103] 0.387218045 0.390977444 0.394736842 0.398496241 0.402255639 0.406015038
## [109] 0.409774436 0.413533835 0.417293233 0.421052632 0.424812030 0.428571429
## [115] 0.432330827 0.436090226 0.439849624 0.443609023 0.447368421 0.451127820
## [121] 0.454887218 0.458646617 0.462406015 0.466165414 0.469924812 0.473684211
## [127] 0.477443609 0.481203008 0.484962406 0.488721805 0.492481203 0.496240602
## [133] 0.500000000
##
## $dbz
## [1] 12.5686893 12.3173433 11.8982305 11.3114379 10.5579811 9.6409858
## [7] 8.5676671 7.3524912 6.0218430 4.6198784 3.2132709 1.8890411
## [13] 0.7381312 -0.1737122 -0.8283465 -1.2512906 -1.4891883 -1.5921087
## [19] -1.6091720 -1.5896992 -1.5821599 -1.6303232 -1.7694859 -2.0245855
## [25] -2.4100298 -2.9302313 -3.5799832 -4.3442938 -5.1978261 -6.1046537
## [31] -7.0196173 -7.8927223 -8.6769837 -9.3376650 -9.8587949 -10.2438964
## [37] -10.5116138 -10.6894307 -10.8079797 -10.8966372 -10.9802874 -11.0772077
## [43] -11.1981223 -11.3463448 -11.5187772 -11.7074923 -11.9016724 -12.0897493
## [49] -12.2616000 -12.4105708 -12.5349379 -12.6383063 -12.7285740 -12.8154841
## [55] -12.9072621 -13.0071528 -13.1107990 -13.2054874 -13.2722777 -13.2914789
## [61] -13.2502907 -13.1493354 -13.0044255 -12.8426050 -12.6950565 -12.5905515
## [67] -12.5514804 -12.5924245 -12.7202475 -12.9346975 -13.2288848 -13.5893678
## [73] -13.9958765 -14.4210309 -14.8308363 -15.1871536 -15.4531999 -15.6016961
## [79] -15.6227241 -15.5269821 -15.3423318 -15.1058464 -14.8557019 -14.6258190
## [85] -14.4436045 -14.3297151 -14.2986430 -14.3593301 -14.5154180 -14.7649981
## [91] -15.0998965 -15.5047232 -15.9562371 -16.4240086 -16.8735912 -17.2727527
## [97] -17.5992900 -17.8466594 -18.0238263 -18.1492694 -18.2427639 -18.3189949
## [103] -18.3848575 -18.4401307 -18.4802628 -18.4998799 -18.4957898 -18.4686207
## [109] -18.4228343 -18.3655051 -18.3046120 -18.2475191 -18.1999932 -18.1657677
## [115] -18.1464773 -18.1417432 -18.1492234 -18.1645248 -18.1809693 -18.1893543
## [121] -18.1780207 -18.1336861 -18.0434445 -17.8979003 -17.6946141 -17.4404011
## [127] -17.1512365 -16.8496724 -16.5609037 -16.3089797 -16.1141036 -15.9911686
## [133] -15.9491882
# slowly damping autocorrelations
# spectral density peaks at 0 (wandering behavior)
# possible seasonal peak at 1/12 = .0833333 (monthly)
# est.ar.wge(df$AVG_EP,p=15,type="burg")
# factor.wge(phi=c(rep(0,11),1))
# some evidence of s=12 in overfit, though Abs Recip should be a higher
# take difference and look at acf
AVE_EP_d1 = artrans.wge(df$AVG_EP,phi.tr=1)
dev.off()
## null device
## 1
acf(AVE_EP_d1)
plotts.sample.wge(AVE_EP_d1)
## $xbar
## [1] 0.0003509434
##
## $autplt
## [1] 1.000000000 0.143400409 0.007216805 0.004618813 -0.003852025
## [6] -0.190246498 -0.124240595 -0.175915478 -0.025667511 0.058883404
## [11] -0.083321390 0.090451630 0.411172282 0.100208511 -0.065665133
## [16] 0.023298835 -0.063084353 -0.181683095 -0.140869541 -0.222865159
## [21] -0.045233335 -0.018336048 -0.015243214 0.088588981 0.368127820
## [26] 0.038711866
##
## $freq
## [1] 0.003773585 0.007547170 0.011320755 0.015094340 0.018867925 0.022641509
## [7] 0.026415094 0.030188679 0.033962264 0.037735849 0.041509434 0.045283019
## [13] 0.049056604 0.052830189 0.056603774 0.060377358 0.064150943 0.067924528
## [19] 0.071698113 0.075471698 0.079245283 0.083018868 0.086792453 0.090566038
## [25] 0.094339623 0.098113208 0.101886792 0.105660377 0.109433962 0.113207547
## [31] 0.116981132 0.120754717 0.124528302 0.128301887 0.132075472 0.135849057
## [37] 0.139622642 0.143396226 0.147169811 0.150943396 0.154716981 0.158490566
## [43] 0.162264151 0.166037736 0.169811321 0.173584906 0.177358491 0.181132075
## [49] 0.184905660 0.188679245 0.192452830 0.196226415 0.200000000 0.203773585
## [55] 0.207547170 0.211320755 0.215094340 0.218867925 0.222641509 0.226415094
## [61] 0.230188679 0.233962264 0.237735849 0.241509434 0.245283019 0.249056604
## [67] 0.252830189 0.256603774 0.260377358 0.264150943 0.267924528 0.271698113
## [73] 0.275471698 0.279245283 0.283018868 0.286792453 0.290566038 0.294339623
## [79] 0.298113208 0.301886792 0.305660377 0.309433962 0.313207547 0.316981132
## [85] 0.320754717 0.324528302 0.328301887 0.332075472 0.335849057 0.339622642
## [91] 0.343396226 0.347169811 0.350943396 0.354716981 0.358490566 0.362264151
## [97] 0.366037736 0.369811321 0.373584906 0.377358491 0.381132075 0.384905660
## [103] 0.388679245 0.392452830 0.396226415 0.400000000 0.403773585 0.407547170
## [109] 0.411320755 0.415094340 0.418867925 0.422641509 0.426415094 0.430188679
## [115] 0.433962264 0.437735849 0.441509434 0.445283019 0.449056604 0.452830189
## [121] 0.456603774 0.460377358 0.464150943 0.467924528 0.471698113 0.475471698
## [127] 0.479245283 0.483018868 0.486792453 0.490566038 0.494339623 0.498113208
##
## $dbz
## [1] -1.120417257 -1.057321119 -0.967803862 -0.871366652 -0.788722954
## [6] -0.737328900 -0.727027932 -0.755566598 -0.803936769 -0.832929506
## [11] -0.785111771 -0.598357730 -0.231544277 0.311855412 0.982548849
## [16] 1.707474033 2.415388266 3.050760530 3.575864464 3.967301805
## [21] 4.211630518 4.301984419 4.236042901 4.015232297 3.645031667
## [26] 3.136374621 2.508200347 1.791000750 1.030379668 0.287870305
## [31] -0.365621850 -0.866725662 -1.180883021 -1.312713072 -1.296521583
## [36] -1.176282765 -0.990656767 -0.768813704 -0.533255163 -0.303994930
## [41] -0.100901678 0.056300912 0.149446791 0.163653216 0.088751394
## [46] -0.079643613 -0.339188280 -0.679708681 -1.081799949 -1.515313688
## [51] -1.938507184 -2.299460174 -2.541922372 -2.616659878 -2.495489134
## [56] -2.180831391 -1.704558815 -1.117208409 -0.474653659 0.171621987
## [61] 0.779050466 1.314716300 1.754229965 2.080053396 2.279937105
## [66] 2.345730221 2.272647177 2.059027183 1.706645504 1.221699857
## [71] 0.616681931 -0.086592792 -0.852639464 -1.627201727 -2.334032236
## [76] -2.879813628 -3.174836472 -3.166982246 -2.866661700 -2.339837877
## [81] -1.676706490 -0.962703359 -0.265241733 0.367438140 0.902603911
## [86] 1.318781847 1.602349308 1.745160510 1.743061309 1.595140367
## [91] 1.303644764 0.874585701 0.319147750 -0.343956826 -1.085220560
## [96] -1.860869906 -2.611439622 -3.266625145 -3.760214129 -4.051758377
## [101] -4.141254015 -4.064681630 -3.874998452 -3.623685594 -3.351483551
## [106] -3.086961339 -2.848371047 -2.645761586 -2.482366783 -2.355417603
## [111] -2.256930428 -2.175055600 -2.096363575 -2.008973862 -1.905767886
## [116] -1.786446623 -1.657390290 -1.529195151 -1.412773358 -1.315320336
## [121] -1.237242196 -1.170777151 -1.100883924 -1.008811114 -0.878045087
## [126] -0.700876001 -0.482625607 -0.241275313 -0.002837887 0.204940314
## [131] 0.357919662 0.438844743
# characteristic seasonal component for 12 months (ACF large at multiples of 12)
AVE_EP_d1_s12 = artrans.wge(AVE_EP_d1,phi.tr=c(rep(0,11),1))
plotts.sample.wge(AVE_EP_d1_s12)
## $xbar
## [1] 9.881423e-05
##
## $autplt
## [1] 1.000000000 0.061881369 0.149355139 -0.067465091 0.065996069
## [6] 0.005480036 0.014532954 0.081574193 -0.007551414 0.110805568
## [11] -0.133491536 -0.005427407 -0.467952630 0.044216355 -0.078037716
## [16] 0.019932518 -0.072550894 0.058968103 -0.061592872 -0.153638588
## [21] -0.019250759 -0.075084909 0.074778865 -0.011612515 -0.027345280
## [26] -0.052413426
##
## $freq
## [1] 0.003952569 0.007905138 0.011857708 0.015810277 0.019762846 0.023715415
## [7] 0.027667984 0.031620553 0.035573123 0.039525692 0.043478261 0.047430830
## [13] 0.051383399 0.055335968 0.059288538 0.063241107 0.067193676 0.071146245
## [19] 0.075098814 0.079051383 0.083003953 0.086956522 0.090909091 0.094861660
## [25] 0.098814229 0.102766798 0.106719368 0.110671937 0.114624506 0.118577075
## [31] 0.122529644 0.126482213 0.130434783 0.134387352 0.138339921 0.142292490
## [37] 0.146245059 0.150197628 0.154150198 0.158102767 0.162055336 0.166007905
## [43] 0.169960474 0.173913043 0.177865613 0.181818182 0.185770751 0.189723320
## [49] 0.193675889 0.197628458 0.201581028 0.205533597 0.209486166 0.213438735
## [55] 0.217391304 0.221343874 0.225296443 0.229249012 0.233201581 0.237154150
## [61] 0.241106719 0.245059289 0.249011858 0.252964427 0.256916996 0.260869565
## [67] 0.264822134 0.268774704 0.272727273 0.276679842 0.280632411 0.284584980
## [73] 0.288537549 0.292490119 0.296442688 0.300395257 0.304347826 0.308300395
## [79] 0.312252964 0.316205534 0.320158103 0.324110672 0.328063241 0.332015810
## [85] 0.335968379 0.339920949 0.343873518 0.347826087 0.351778656 0.355731225
## [91] 0.359683794 0.363636364 0.367588933 0.371541502 0.375494071 0.379446640
## [97] 0.383399209 0.387351779 0.391304348 0.395256917 0.399209486 0.403162055
## [103] 0.407114625 0.411067194 0.415019763 0.418972332 0.422924901 0.426877470
## [109] 0.430830040 0.434782609 0.438735178 0.442687747 0.446640316 0.450592885
## [115] 0.454545455 0.458498024 0.462450593 0.466403162 0.470355731 0.474308300
## [121] 0.478260870 0.482213439 0.486166008 0.490118577 0.494071146 0.498023715
##
## $dbz
## [1] 0.256286821 0.546568420 0.952837510 1.397969554 1.818517774
## [6] 2.171517399 2.431835518 2.586947169 2.632367320 2.568479159
## [11] 2.398581638 2.127839581 1.762935993 1.312399163 0.787751723
## [16] 0.205768851 -0.407883853 -1.013892123 -1.555395592 -1.960157417
## [21] -2.155909549 -2.098691610 -1.796550260 -1.306688445 -0.708789499
## [26] -0.077950661 0.528174551 1.070090033 1.522194178 1.868123676
## [31] 2.097279981 2.202672411 2.179826470 2.026544105 1.743464190
## [36] 1.335558727 0.814865432 0.204765970 -0.454441082 -1.100884316
## [41] -1.653410996 -2.029142815 -2.175656259 -2.096353000 -1.844515099
## [46] -1.492349427 -1.102864557 -0.718933511 -0.365235922 -0.054692828
## [51] 0.205443392 0.409412480 0.550750500 0.621643480 0.613302099
## [56] 0.516918667 0.324973300 0.032925741 -0.358383402 -0.839054363
## [61] -1.384612777 -1.949022340 -2.459779620 -2.824432126 -2.959127196
## [66] -2.831252082 -2.480174531 -1.993584247 -1.465174899 -0.968259754
## [71] -0.550901214 -0.241471621 -0.055723628 -0.002093647 -0.084856825
## [76] -0.305480057 -0.662376918 -1.148854163 -1.748558382 -2.427506214
## [81] -3.123025247 -3.735547164 -4.140359499 -4.236959997 -4.011963346
## [86] -3.548246410 -2.968531294 -2.377735302 -1.842765671 -1.397726628
## [91] -1.055164236 -0.815182020 -0.671416274 -0.614833756 -0.636258901
## [96] -0.728051343 -0.884911360 -1.103495838 -1.380378027 -1.707835733
## [101] -2.067067325 -2.419280736 -2.698181090 -2.813739011 -2.680178203
## [106] -2.262694762 -1.603612522 -0.798765477 0.049284527 0.860873032
## [111] 1.584517262 2.190512434 2.662960956 2.993768597 3.178835139
## [116] 3.215817299 3.102884820 2.838125422 2.419508272 1.845626764
## [121] 1.117960269 0.246390726 -0.738560101 -1.761896007 -2.670695542
## [126] -3.229542302
dev.off()
## null device
## 1
acf(AVE_EP_d1_s12)
# doesn't look like white noise
# aic5.wge(AVE_EP_d1_s12,p=0:15) #13,0 12,2 12,0
EP_est = est.arma.wge(AVE_EP_d1_s12,p=12)
##
##
## Coefficients of AR polynomial:
## 0.0678 0.0697 -0.0351 0.0627 0.0391 0.0130 0.0534 0.0131 0.0759 -0.0687 -0.0137 -0.4792
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.8492B+0.9205B^2 -1.0044+-0.2784i 0.9594 0.4570
## 1-1.8551B+0.9104B^2 1.0189+-0.2456i 0.9541 0.0377
## 1-1.3412B+0.9077B^2 0.7388+-0.7456i 0.9527 0.1257
## 1-0.4737B+0.8816B^2 0.2687+-1.0306i 0.9389 0.2094
## 1+1.2920B+0.8465B^2 -0.7631+-0.7739i 0.9201 0.3739
## 1+0.4610B+0.8443B^2 -0.2730+-1.0535i 0.9188 0.2904
##
##
# short 3 month ARUMA(12,1,0) s=12
# roll.win.ase.wge(df$AVG_EP,horizon=3,s=12,d=1,phis=EP_est$phi,thetas=EP_est$theta)
# 2.8495e-05 -> 0.0000285
f.short = fore.arima.wge(df$AVG_EP,phi=EP_est$phi,theta=EP_est$theta,s=12,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short$f)^2)
ASE.short #3.595854e-05 -> 0.000036
## [1] 3.595854e-05
# long 36 month ARUMA(12,1,0) s=12
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est$phi,thetas=EP_est$theta)
# 0.0004547268
f.long = fore.arima.wge(df$AVG_EP,phi=EP_est$phi,theta=EP_est$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long$f)^2)
ASE.long
## [1] 0.001223659
#0.001223659
# try other AIC5 options with long forecast: ARUMA(13,1,0) s=12
EP_est_13 = est.arma.wge(AVE_EP_d1_s12,p=13)
##
##
## Coefficients of AR polynomial:
## 0.1128 0.0693 -0.0277 0.0568 0.0365 0.0050 0.0535 0.0144 0.0724 -0.0676 -0.0225 -0.4814 0.0981
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.8636B+0.9335B^2 -0.9982+-0.2736i 0.9662 0.4574
## 1-1.3203B+0.8982B^2 0.7350+-0.7571i 0.9477 0.1273
## 1-1.8307B+0.8877B^2 1.0311+-0.2516i 0.9422 0.0381
## 1-0.4434B+0.8830B^2 0.2510+-1.0341i 0.9397 0.2121
## 1+1.3242B+0.8664B^2 -0.7642+-0.7551i 0.9308 0.3760
## 1+0.4947B+0.8570B^2 -0.2886+-1.0409i 0.9257 0.2930
## 1-0.2010B 4.9750 0.2010 0.0000
##
##
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est_13$phi,thetas=EP_est_13$theta)
# 0.0004697769
f = fore.arima.wge(df$AVG_EP,phi=EP_est_13$phi,theta=EP_est_13$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
mean((df$AVG_EP[(266-36+1):266]-f$f)^2)
## [1] 0.001237814
#0.001237814
# try 12,2 ARUMA(12,1,2) s=12
EP_est_12_2 = est.arma.wge(AVE_EP_d1_s12,p=12,q=2)
##
##
## Coefficients of AR polynomial:
## -0.1173 -0.1001 0.0061 0.0723 0.0347 0.0251 0.0581 0.0310 0.0890 -0.0515 -0.0205 -0.5046
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.8498B+0.9224B^2 -1.0027+-0.2806i 0.9604 0.4566
## 1-0.4248B+0.9030B^2 0.2353+-1.0257i 0.9502 0.2141
## 1-1.3025B+0.8935B^2 0.7288+-0.7668i 0.9453 0.1290
## 1+0.4967B+0.8923B^2 -0.2783+-1.0214i 0.9446 0.2923
## 1+1.3066B+0.8760B^2 -0.7458+-0.7651i 0.9359 0.3730
## 1-1.8084B+0.8674B^2 1.0424+-0.2573i 0.9313 0.0385
##
##
##
##
## Coefficients of MA polynomial:
## -0.2340 -0.2435
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1+0.2340B+0.2435B^2 -0.4806+-1.9687i 0.4935 0.2881
##
##
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est_12_2$phi,thetas=EP_est_12_2$theta)
# 0.0004835913
f = fore.arima.wge(df$AVG_EP,phi=EP_est_12_2$phi,theta=EP_est_12_2$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
mean((df$AVG_EP[(266-36+1):266]-f$f)^2)
## [1] 0.001275155
# 0.00128316
# None of these beat Nick's ARUMA(13,1,5) s=12
# Try another model entirely - take 1st diff then fit an ARMA
AVE_EP_d1 = artrans.wge(df$AVG_EP,phi.tr=1)
# aic5.wge(AVE_EP_d1,p=0:15,q=0:5) #15,5
# aic5.wge(AVE_EP_d1,p=10:20,q=0:5) #15,5
# aic5.wge(AVE_EP_d1,p=0:15,q=0:5,type="bic") #12,0 13,1
# ARIMA(15,1,5)
EP_est_15_5 = est.arma.wge(AVE_EP_d1,p=15,q=5)
##
##
## Coefficients of AR polynomial:
## -0.1148 0.4747 0.2752 -0.1471 -0.7224 0.0089 -0.0739 0.0415 0.1338 -0.2228 -0.1037 0.2903 0.1106 -0.1338 -0.1832
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.7289B+0.9959B^2 0.8680+-0.5007i 0.9980 0.0833
## 1+0.9979B -1.0021 0.9979 0.5000
## 1+0.9941B+0.9922B^2 -0.5009+-0.8700i 0.9961 0.3331
## 1-0.0030B+0.8512B^2 0.0018+-1.0839i 0.9226 0.2497
## 1-1.0089B+0.7780B^2 0.6484+-0.9300i 0.8820 0.1531
## 1-1.6665B+0.7403B^2 1.1255+-0.2897i 0.8604 0.0401
## 1+1.5748B+0.7012B^2 -1.1230+-0.4063i 0.8374 0.4447
## 1+0.9554B+0.5405B^2 -0.8838+-1.0340i 0.7352 0.3626
##
##
##
##
## Coefficients of MA polynomial:
## -0.2191 0.4727 0.4063 -0.2067 -0.8269
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1+0.9732B -1.0275 0.9732 0.5000
## 1-1.6903B+0.9350B^2 0.9039+-0.5025i 0.9670 0.0807
## 1+0.9362B+0.9087B^2 -0.5151+-0.9139i 0.9532 0.3317
##
##
# short 3 month
# roll.win.ase.wge(df$AVG_EP,horizon=3,d=1,phis=EP_est_15_5$phi,thetas=EP_est_15_5$theta)
# 3.695999e-05
dev.off()
## null device
## 1
f.short.15.5 = fore.arima.wge(df$AVG_EP,phi=EP_est_15_5$phi,theta=EP_est_15_5$theta,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short.15.5$f)^2)
ASE.short #4.216765e-05 -> 0.000042167
## [1] 4.567978e-05
# 36 month ARIMA(15,1,5)
# roll.win.ase.wge(df$AVG_EP,horizon=36,d=1,phis=EP_est_15_5$phi,thetas=EP_est_15_5$theta)
# 0.0002368869
f.long.15.5 = fore.arima.wge(df$AVG_EP,phi=EP_est_15_5$phi,theta=EP_est_15_5$theta,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long.15.5$f)^2)
ASE.long
## [1] 0.0001089089
# 0.0001089089
# ARIMA(12,1,0)
EP_est_12_0 = est.arma.wge(AVE_EP_d1,p=12)
##
##
## Coefficients of AR polynomial:
## 0.1026 0.0192 -0.0458 0.0443 -0.1428 -0.0133 -0.0959 -0.0076 0.0919 -0.1504 0.0576 0.3936
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.6643B+0.9156B^2 0.9089+-0.5159i 0.9568 0.0822
## 1-0.0267B+0.9047B^2 0.0148+-1.0513i 0.9511 0.2478
## 1+0.9674B+0.8990B^2 -0.5381+-0.9071i 0.9481 0.3352
## 1-0.9514B+0.8435B^2 0.5640+-0.9314i 0.9184 0.1633
## 1-0.9027B 1.1078 0.9027 0.0000
## 1+0.8985B -1.1130 0.8985 0.5000
## 1+1.5766B+0.7727B^2 -1.0202+-0.5033i 0.8790 0.4271
##
##
# short 3 month
# roll.win.ase.wge(df$AVG_EP,horizon=3,d=1,phis=EP_est_12_0$phi)
# 2.661653e-05
f.short.12.0 = fore.arima.wge(df$AVG_EP,phi=EP_est_12_0$phi,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short.12.0$f)^2)
ASE.short # 8.029078e-05 -> 0.0000803
## [1] 8.029078e-05
# 36 month ARIMA(12,1,0)
# roll.win.ase.wge(df$AVG_EP,horizon=36,d=1,phis=EP_est_12_0$phi)
# 0.0002323361
f.long.12.0 = fore.arima.wge(df$AVG_EP,phi=EP_est_12_0$phi,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long.12.0$f)^2)
ASE.long
## [1] 0.0001502498
# 0.0001502498
plotts.sample.wge(df$AVG_EP)
## over-fit like our lives depend on it
overfit_ar_30_ricco=est.ar.wge(df$AVG_EP,p=50, type='burg')
# Clearly a 1-B in there. try removing it and over-fitting again.
AVG_E_i1_ricco = artrans.wge(df$AVG_EP, c(1))
overfit_ar_30_i1_ricco=est.ar.wge(AVG_E_i1_ricco,p=20, type='burg')
# nonstationary_lambda = mult.wge(fac1=c(1-1.7276B+0.9942B^2), c(1+0.9951B), c(1+0.0130B+0.9896B^2), c(1+0.9916B+0.9889B^2))
nonstationary_lambda_ricco = mult.wge(c(1.7276,-0.9942), c(-0.9951), c(-0.0130,-0.9896), c(-0.9916,-0.9889))
nonstationary_lambda$model.coef
AVG_E_lambda_corrected_ricco = artrans.wge(df$AVG_EP, c(nonstationary_lambda$model.coef))
aic_lambda_corrected_ricco = aic.wge(AVG_E_lambda_corrected_ricco, p=0:12, q=0:5, type="bic")
forecast_arma = fore.aruma.wge(df$AVG_EP, phi=aic_lambda_corrected_ricco$phi, theta=aic_lambda_corrected_ricco$theta, d=1, n.ahead = 36,limits = FALSE, plot=TRUE, lastn = TRUE, lambda =c(nonstationary_lambda$model.coef))
forecast_arma_ricco = fore.aruma.wge(df$AVG_EP, phi=overfit_ar_30_i1_ricco$phi,n.ahead = 36,limits = FALSE, plot=TRUE, lastn = TRUE)
## ^^^ YUCK! that looks terrible. let's try a different approach.
# try finding best long non-stationary estimates Seasonality by rolling window rmse (this has O(n^2) runtime complexity)
best_s_ricco = 0
best_d_ricco = 0
max_s_ricco = 10
max_d_ricco = 2
best_rsme_seasonality_search_and_wandering_search = 10000.0
for (s_try in 0:max_s_ricco) {
for (d_try in 0:max_d_ricco) {
rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, d = d_try, s=s_try, horizon = 36, plot=FALSE)
if(rolling.res.long_ricco$rwRMSE < best_rsme_seasonality_search_and_wandering_search) {
best_rsme_seasonality_search_and_wandering_search = rolling.res.long_ricco$rwRMSE
best_s_ricco = s_try
best_d_ricco = d_try
}
}
}
print(paste("best long rolling window s was:", best_s_ricco))
print(paste("best long rolling window d was:", best_d_ricco))
# best s=0 and d=1
AVG_E_i1_ricco = artrans.wge(df$AVG_EP, c(1))
# now find best p and q with a rolling window wide grid search
best_p_wide_ricco = 0
best_q_wide_ricco = 0
best_cost_wide_ricco = 10000
best_rsme_s0_d1_p_search_wide = 10000.0
best_last36_ase_wide = 10000.0
q_seq_wide_ricco = seq(1, 11, 5)
p_seq_wide_ricco = seq(0, 30, 10)
for(q_try in q_seq_wide_ricco) {
for (p_try in p_seq_wide_ricco) {
was_error = FALSE
tryCatch(expr = {
print(paste("trying p: ", p_try, "and q: ", q_try))
capture.output(est_p_q_search_ricco <- est.arma.wge(AVG_E_i1_ricco, p=p_try, q=q_try), file='NUL')
rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=est_p_q_search_ricco$phi, theta=est_p_q_search_ricco$theta, d=1, s=0, horizon = 36, plot=FALSE)
capture.output(f_ricco <- fore.aruma.wge(df$AVG_EP, phi=est_p_q_search_ricco$phi, d=1, s=0, n.ahead=36, lastn=TRUE, plot=FALSE))
prediction_ase_ricco = mean((df$AVG_EP[(266-35):266]-f$f)^2)
# weight last 36 higher because they are the forecasts we should care about most
cost_ricco = (rolling.res.long_ricco$rwRMSE + prediction_ase_ricco)/2
if(cost_ricco < best_cost_wide_ricco) {
best_cost_wide_ricco = cost_ricco
best_rsme_s0_d1_p_search_wide = rolling.res.long_ricco$rwRMSE
best_last36_ase_wide = prediction_ase_ricco
best_p_wide_ricco = p_try
best_q_wide_ricco = q_try
}
},
error = function(e) {
print(paste("an error occured for p: ", p_try, "and q: ", q_try))
was_error = TRUE
})
if(was_error)
next
}
}
print(paste("best long + immediate rolling window p with wide search was:", best_p_wide_ricco))
print(paste("best long + immediate rolling window q with wide search was:", best_q_wide_ricco))
# now find best p and q with a rolling window narrow grid search
best_p_narrow_ricco = 0
best_q_narrow_ricco = 0
best_cost_narrow_ricco = 10000
best_rsme_s0_d1_p_search_narrow = 10000.0
best_last36_ase_narrow = 10000.0
min_q_narrow_ricco = max(c(1, best_q_wide_ricco-2))
min_p_narrow_ricco = max(c(1, best_p_wide_ricco-4))
q_seq_narrow_ricco = seq(min_q_narrow_ricco, best_q_wide_ricco+1, 1)
p_seq_narrow_ricco = seq(min_p_narrow_ricco, best_p_wide_ricco+2, 1)
best_arma_estimates = NULL
for(q_try in q_seq_narrow_ricco) {
for (p_try in p_seq_narrow_ricco) {
was_error = FALSE
tryCatch(expr = {
print(paste("trying p: ", p_try, "and q: ", q_try))
capture.output(est_p_q_search_ricco <- est.arma.wge(AVG_E_i1_ricco, p=p_try, q=q_try), file='NUL')
rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=est_p_q_search_ricco$phi, theta=est_p_q_search_ricco$theta, d=1, s=0, horizon = 36, plot=FALSE)
capture.output(f_ricco <- fore.aruma.wge(df$AVG_EP, phi=est_p_q_search_ricco$phi, d=1, s=0, n.ahead=36, lastn=TRUE, plot=FALSE))
prediction_ase_ricco = mean((df$AVG_EP[(266-35):266]-f$f)^2)
# weight last 36 higher because they are the forecasts we should care about most
cost_ricco = (rolling.res.long_ricco$rwRMSE + prediction_ase_ricco)/2
if(cost_ricco < best_cost_narrow_ricco) {
best_cost_narrow_ricco = cost_ricco
best_rsme_s0_d1_p_search_narrow = rolling.res.long_ricco$rwRMSE
best_last36_ase_narrow = prediction_ase_ricco
best_p_narrow_ricco = p_try
best_q_narrow_ricco = q_try
best_arma_estimates = est_p_q_search_ricco
}
},
error = function(e) {
print(paste("an error occured for p: ", p_try, "and q: ", q_try))
was_error = TRUE
})
if(was_error)
next
}
}
print(paste("best long + immediate rolling window p with narrow search was:", best_p_narrow_ricco))
print(paste("best long + immediate rolling window q with narrow search was:", best_q_narrow_ricco))
best_rsme_s0_d1_p_search_narrow^2
best_last36_ase_narrow
length(df$AVG_EP)
# best P was p=30 q=11, WOW! A huge and complex model BUT it was rolling window RSME verified!
# I wonder how the ARIMA(30,1,11) would do on future, unseen data. it’s probably just REALLY good at fitting the signal we have, even if we rolling window verify. true cross-validation would be better, if we had the data.
print(paste("best long rolling (36) ase was:", best_rsme_s0_d1_p_search^2, "for p:", best_p_ricco, "for q:", best_q_ricco, "and d = 1"))
# last 3 ASE
f_ricco_3 = fore.aruma.wge(df$AVG_EP, phi=best_arma_estimates$phi, theta = best_arma_estimates$theta, d=1, s=0, n.ahead=3, lastn=TRUE)
prediction_ase_ricco_3 = mean((df$AVG_EP[(266-3):266]-f_ricco_3$f)^2)
print(paste("Last 3 ASE was s:", prediction_ase_ricco_3, "for p:", best_p_narrow_ricco, "q:" , best_q_narrow_ricco, "and d = 1"))
# last 36 ASE
f_ricco_36 = fore.aruma.wge(df$AVG_EP, phi=best_arma_estimates$phi, theta = best_arma_estimates$theta, d=1, s=0, n.ahead=36, lastn=TRUE)
prediction_ase_ricco_36 = mean((df$AVG_EP[(266-35):266]-f_ricco_36$f)^2)
print(paste("Last 36 ASE was s:", prediction_ase_ricco_36, "for p:", best_p_narrow_ricco, "q:" , best_q_narrow_ricco, "and d = 1"))
rolling.res.short_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=best_arma_estimates$phi, theta=best_arma_estimates$theta, d=1, s=0, horizon = 3, plot=FALSE)
# Rolling
rolling.res.short_ricco$rwRMSE^2
best_arma_estimates_SSE = sum(best_arma_estimates$res^2)
aic = 266*log(best_arma_estimates_SSE/266) +2*(30+11+1)
aic
# residuals look VERY white, maybe a slight amount of heteroskeascitity and trend at the very end?
plotts.sample.wge(f_ricco$resid)
# interestingly, ljung box test cannot reject non-stationarity in the residuals! This seems weird.
ljung.wge(f_ricco$resid)
best_arma_estimates
Appendix: A Modified version of current roll.win.rmse.wge in github
(uses fore.aruma.wge from cran, and doesn’t print), to use
the latest tswge, replace fore.aruma.wge with
fore.arima.wge)
#Rolling Window RMSE Function
# series is the array of the series
# horizon is how far you want to predict into the future
# d is the order of the differencing: (1-B^)^d
# s is the order of the seasonality: (1-B^s)
# phi = coefficients of the stationary AR term
# theta = coefficients of the invertible MA term
# It simply takes the given horizon and the model in the form of s,d,phis and
# thetas and figures out how many windows it can create in the data (series) and then calculates the ASE for each window.
#The output is the average off all the ASEs from each individual window.
roll.win.rmse.wge_ricco = function(series, horizon = 2, s = 0, d = 0, phi = 0, theta = 0, plot=FALSE)
{
#DEFINE fore.arma.wge2
fore.arma.wge2=function(x,phi=0,theta=0,n.ahead=5,lastn=FALSE, plot=FALSE,alpha=.05,limits=TRUE, xbar2 = NULL)
{
# lastn=TRUE indicates that the last n data values are to be forecast
# lastn=FALSE (default) indicates we want foreacts for n values beyond the end of the realization
n=length(x)
p=length(phi)
if(sum(phi^2)==0) {p=0}
q=length(theta)
if(sum(theta^2)==0) {q=0}
#resid=rep(0,n)
npn.ahead=n+n.ahead
xhat=rep(0,npn.ahead)
if(is.null(xbar2))
{
xbar=mean(x)
}
else
{
xbar = xbar2
}
const=1
if (p > 0) {for(jp in 1:p) {const=const-phi[jp]}}
#
#
# Calculate Box-Jenkins Forecasts
#
#
#Calculating Residuals
#
resid=backcast.wge(x,phi,theta,n.back=50)
#
#
#maconst=const*xbar
#p1=max(p+1,q+1)
#for (i in p1:n) {resid[i]=x[i]
# if ( p > 0) {for (jp in 1:p) {resid[i]=resid[i]-phi[jp]*x[i-jp]}}
# if (q > 0) {for (jq in 1:q) {resid[i]=resid[i]+theta[jq]*resid[i-jq]}}
# resid[i]=resid[i]-maconst}
#
# Calculating Forecasts
#
#
npn.ahead=n+n.ahead
xhat=rep(0,npn.ahead)
mm=n
#
#lastn = TRUE
#
if(lastn==TRUE) {mm=n-n.ahead}
#
#
for (i in 1:mm) {xhat[i]=x[i]}
for (h in 1:n.ahead) {
if (p > 0) {for (jp in 1:p) {xhat[mm+h]=xhat[mm+h]+phi[jp]*xhat[mm+h-jp]}}
if ((h<=q)&(h>0)) {for(jq in h:q) {xhat[mm+h]=xhat[mm+h]-theta[jq]*resid[mm+h-jq]}}
xhat[mm+h]=xhat[mm+h]+xbar*const}
#
#
# Calculate psi weights for forecasts limits
#
#
xi=psi.weights.wge(phi,theta,lag.max=n.ahead)
#
#
#
#Setting up for plots
nap1=n.ahead+1
fplot=rep(0,nap1)
maxh=mm+n.ahead
llplot=rep(0,nap1)
ulplot=rep(0,nap1)
f=rep(0,nap1)
ll=rep(0,nap1)
ul=rep(0,nap1)
wnv=0
xisq=rep(0,n.ahead)
se=rep(0,n.ahead)
se0=1
for (i in 1:n) {wnv=wnv+resid[i]**2}
wnv=wnv/n
xisq[1]=1
for (i in 2:n.ahead) {xisq[i]=xisq[i-1]+xi[i-1]^2}
for (i in 1:n.ahead) {se[i]=sqrt(wnv*xisq[i])}
fplot[1]=x[mm]
for (i in 1:n.ahead) {fplot[i+1]=xhat[mm+i]}
ulplot[1]=x[mm]
#for (i in 1:n.ahead) { ulplot[i+1]=fplot[i+1]+1.96*se[i]}
for (i in 1:n.ahead) { ulplot[i+1]=fplot[i+1]-qnorm(alpha/2)*se[i]}
llplot[1]=x[mm]
#for (i in 1:n.ahead) { llplot[i+1]=fplot[i+1]-1.96*se[i]}
for (i in 1:n.ahead) { llplot[i+1]=fplot[i+1]+qnorm(alpha/2)*se[i]}
#
if(limits==FALSE) {
if(lastn==TRUE) {max=max(x,xhat[1:n])
min=min(x,xhat[1:n])}
else {max=max(x,xhat)
min=min(x,xhat)}}
if(limits==TRUE) {min=min(x,llplot)
max=max(x,ulplot)}
#numrows <- 1
#numcols <- 1
timelab <- 'Time'
valuelab <- ''
#fig.width <- 5
#fig.height <- 2.5
cex.labs <- c(.8,.7,.8)
#par(mfrow=c(numrows,numcols),mar=c(6,2,3,1))
t<-1:n;
np1=n+1
np.ahead=mm+n.ahead
tf<-mm:np.ahead
#if (plot=='TRUE') {
#fig.width <- 5
#fig.height <- 2.5
#cex.labs <- c(1.2,1.2,1.2)
#par(mfrow=c(numrows,numcols),mar=c(9,4,3,2))
#plot(t,x,type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1,maxh),ylim=c(min,max),col=1)
#axis(side=1,cex.axis=1.1,mgp=c(3,0.15,0),tcl=-.3);
#axis(side=2,las=1,cex.axis=1.1,mgp=c(3,.4,0),tcl=-.3)
#abline=mean(x)
#mtext(side=c(1,2,1),cex=cex.labs,text=c(timelab,valuelab,""),line=c(1.2,2.1,1.8))
#points(tf,fplot,type='o',lty=1,cex=.6,lwd=1,pch=1,col=2);
#if(limits=='TRUE') {points(tf,ulplot,type='l',lty=2,cex=0.6,lwd=.75,pch=1,col=4)
#points(tf,llplot,type='l',lty=3,cex=0.6,lwd=.75,pch=1,col=4)
# }
#}
np1=n+1
nap1=n.ahead+1
f=fplot[2:nap1]
# Calculate RMSE and MAD
if(lastn==TRUE){
t.start=n-n.ahead
sum.rmse=0
sum.mad=0
for(i in 1:n.ahead) {sum.rmse=sum.rmse+(f[i]-x[t.start+i])^2
sum.mad=sum.mad+abs(f[i]-x[t.start+i])}
mse=sum.rmse/n.ahead
rmse=sqrt(mse)
mad=sum.mad/n.ahead
}
ll=llplot[2:nap1]
ul=ulplot[2:nap1]
if(lastn==TRUE){out1=list(f=f)}
if(lastn==FALSE){out1=list(f=f)}
return(out1)
}
numwindows = 0
RMSEHolder = numeric()
if(s == 0 & d == 0)
{
trainingSize = max(length(phi),length(theta)) + 1 # The plus 1 is for the backcast residuals which helps with ARMA model with q > 0
numwindows = length(series)-(trainingSize + horizon) + 1
RMSEHolder = numeric(numwindows)
# print(paste("Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with", numwindows, "windows."))
for( i in 1:numwindows)
{
forecasts <- fore.arma.wge2(series[i:(i+(trainingSize-1))], plot = TRUE, phi = phi, theta = theta,n.ahead = horizon, xbar = mean(series))
RMSE = sqrt(mean((series[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2))
RMSEHolder[i] = RMSE
}
}
else
{
trainingSize = sum(length(phi),length(theta),s, d) + 1 # sum and plus one is to help backcast.wge, lag.max and ylim plotting issue in fore.arima.wge
numwindows = length(series)-(trainingSize + horizon) + 1
RMSEHolder = numeric(numwindows)
# print(paste("Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with", numwindows, "windows."))
for( i in 1:numwindows)
{
#invisible(capture.output(forecasts <- fore.arima.wge(series[i:(i+(trainingSize-1))],phi = phis, theta = thetas, s = s, d = d,n.ahead = horizon)))
forecasts <- fore.aruma.wge(series[i:(i+(trainingSize-1))],phi = phi, s = s, d = d, theta = theta,n.ahead = horizon, plot=plot)
RMSE = sqrt(mean((series[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2))
RMSEHolder[i] = RMSE
}
}
RMSEHolder
#hist(RMSEHolder, main = "RMSEs for Individual Windows")
WindowedRMSE = mean(RMSEHolder)
# print("The Summary Statistics for the Rolling Window RMSE Are:")
# print(summary(RMSEHolder))
# print(paste("The Rolling Window RMSE is: ",round(WindowedRMSE,3)))
#output
invisible(list(rwRMSE = WindowedRMSE, trainingSize = trainingSize, numwindows = numwindows, horizon = horizon, s = s, d = d, phi = phi, theta = theta, RMSEs = RMSEHolder))
}